In [1]:
    
import graphlab
    
In [2]:
    
sales = graphlab.SFrame('home_data.gl')
    
    
In [1]:
    
sales
    
    
In [8]:
    
sales.show(view="Scatter Plot", x="sqft_living", y="price")
    
    
In [5]:
    
train_data,test_data = sales.random_split(.8,seed=0)
    
    
In [6]:
    
import graphlab
    
In [7]:
    
sales = graphlab.SFrame('home_data.gl')
    
    
In [8]:
    
sales
    
    Out[8]:
In [9]:
    
train_data,test_data = sales.random_split(.8,seed=0)
    
In [10]:
    
sqft_model=graphlab.linear_regression.create(train_data, target='price', features=['sqft_living'])
    
    
In [12]:
    
print test_data['price'].mean()
    
    
In [14]:
    
print sqft_model.evaluate(test_data)
    
    
In [15]:
    
import matplotlib.pyplot as plt
%matplotlib inline
    
In [16]:
    
plt.plot(test_data['sqft_living'],test_data['price'],'.',
        test_data['sqft_living'],sqft_model.predict(test_data),'-')
    
    Out[16]:
    
In [17]:
    
sqft_model.get('coefficients')
    
    Out[17]:
In [18]:
    
features = ['bedrooms','bathrooms','sqft_living','sqft_lot','floors','zipcode']
    
In [21]:
    
sales[features].show()
    
    
In [22]:
    
sales.show(view='BoxWhisker Plot', x='zipcode', y='price')
    
    
In [23]:
    
my_features_model= graphlab.linear_regression.create(train_data,target='price',features=features)
    
    
In [27]:
    
print features
    
    
In [29]:
    
print sqft_model.evaluate(test_data)
print my_features_model.evaluate(test_data)
    
    
In [30]:
    
house1 = sales[sales['id']=='5309101200']
    
In [31]:
    
house1
    
    Out[31]:
In [32]:
    
print house1['price']
    
    
In [34]:
    
print sqft_model.predict(house1)
    
    
In [35]:
    
print my_features_model.predict(house1)
    
    
In [36]:
    
house2 = sales[sales['id']=='1925069082']
    
In [37]:
    
print house2
    
    
In [38]:
    
print house2['price']
    
    
In [39]:
    
print sqft_model.predict(house2)
    
    
In [40]:
    
print my_features_model.predict(house2)
    
    
In [46]:
    
expensiveHouses = sales[sales['zipcode']=='98039']
    
In [47]:
    
print expensiveHouses
    
    
In [48]:
    
print expensiveHouses['price'].mean()
    
    
In [63]:
    
fraction_finder = sales[(sales['sqft_living'] >= 2000) & (sales['sqft_living'] <= 4000)]
fraction_finder.show()
    
    
In [55]:
    
advanced_features = [
'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode',
'condition', # condition of house				
'grade', # measure of quality of construction				
'waterfront', # waterfront property				
'view', # type of view				
'sqft_above', # square feet above ground				
'sqft_basement', # square feet in basement				
'yr_built', # the year built				
'yr_renovated', # the year renovated				
'lat', 'long', # the lat-long of the parcel				
'sqft_living15', # average sq.ft. of 15 nearest neighbors 				
'sqft_lot15', # average lot size of 15 nearest neighbors 
]
    
In [56]:
    
print advanced_features
    
    
In [58]:
    
my_advance_model= graphlab.linear_regression.create(train_data,target='price', features = advanced_features)
    
    
In [59]:
    
print sqft_model.evaluate(test_data)
print my_features_model.evaluate(test_data)
print my_advance_model.evaluate(test_data)
    
    
In [ ]: